Load and explore data

dataPath <- "/Users/rolam/Documents/Uchicago MSCA/MSCA Time Series/Final Project"
data <- read_excel(paste(dataPath,'TS_Project.xlsx',sep="/"))
head(data)
## # A tibble: 6 × 9
##   time                export import   cpi   ppi  bond sentiment  USDX uncertai…¹
##   <dttm>               <dbl>  <dbl> <dbl> <dbl> <dbl>     <dbl> <dbl>      <dbl>
## 1 2002-01-01 00:00:00   1.41   3.57  178.  128.  5.04      93    120.      117. 
## 2 2002-02-01 00:00:00   1.34   3.52  178   128.  4.91      90.7  119.       87.6
## 3 2002-03-01 00:00:00   1.76   3.79  178.  130.  5.28      95.7  119.       83.4
## 4 2002-04-01 00:00:00   1.46   4.45  179.  131.  5.21      93    115.       88.6
## 5 2002-05-01 00:00:00   1.63   4.36  180.  131.  5.16      96.9  112.       86.3
## 6 2002-06-01 00:00:00   1.76   4.48  180.  131.  4.93      92.4  106.       93.6
## # … with abbreviated variable name ¹​uncertainty
import <- ts(data$import, start = c(2002,01), frequency = 12)
autoplot(import)

import_1 <- window(import,start = c(2002,01), end = c(2007,12))
autoplot(import_1)

import_2 <- window(import,start = c(2008,01), end = c(2010,12))
autoplot(import_2)

import_3 <- window(import,start = c(2009,01), end = c(2014,12))
autoplot(import_3)

import_4 <- window(import,start = c(2015,01), end = c(2020,12))
autoplot(import_4)

import_5 <- window(import,start = c(2020,01), end = c(2023,02))
autoplot(import_5)

import_boxcox <- BoxCox(import, BoxCox.lambda(import))
plot(import_boxcox)

kpss.test(import_boxcox)
## Warning in kpss.test(import_boxcox): p-value smaller than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  import_boxcox
## KPSS Level = 3.9837, Truncation lag parameter = 5, p-value = 0.01
import_diff <- diff(import_boxcox,1)
tsdisplay(import_diff)

kpss.test(import_diff)
## Warning in kpss.test(import_diff): p-value greater than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  import_diff
## KPSS Level = 0.10007, Truncation lag parameter = 5, p-value = 0.1
import_sdiff <- diff(import_boxcox,12)
tsdisplay(import_sdiff)

kpss.test(import_sdiff)
## Warning in kpss.test(import_sdiff): p-value greater than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  import_sdiff
## KPSS Level = 0.18626, Truncation lag parameter = 4, p-value = 0.1
s <- stl(import,"periodic")
plot(s)

s <- decompose(import, "multiplicative")
plot(s)

### Post-COVID predictions

import_train_set <- window(import,start = c(2002,01), end = c(2021,12))
import_test_set <- window(import,start = c(2022,01), end = c(2022,12))
fit_sarima <- auto.arima(import_train_set, lambda = BoxCox.lambda(import_train_set), seasonal=TRUE, trace = TRUE)
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(2,0,2)(1,1,1)[12] with drift         : -640.9173
##  ARIMA(0,0,0)(0,1,0)[12] with drift         : -297.8771
##  ARIMA(1,0,0)(1,1,0)[12] with drift         : -575.3879
##  ARIMA(0,0,1)(0,1,1)[12] with drift         : -475.6234
##  ARIMA(0,0,0)(0,1,0)[12]                    : -215.4073
##  ARIMA(2,0,2)(0,1,1)[12] with drift         : -633.2096
##  ARIMA(2,0,2)(1,1,0)[12] with drift         : -596.8912
##  ARIMA(2,0,2)(2,1,1)[12] with drift         : -650.0262
##  ARIMA(2,0,2)(2,1,0)[12] with drift         : -616.9904
##  ARIMA(2,0,2)(2,1,2)[12] with drift         : -650.02
##  ARIMA(2,0,2)(1,1,2)[12] with drift         : -646.7641
##  ARIMA(1,0,2)(2,1,1)[12] with drift         : -644.4222
##  ARIMA(2,0,1)(2,1,1)[12] with drift         : -644.1483
##  ARIMA(3,0,2)(2,1,1)[12] with drift         : -654.73
##  ARIMA(3,0,2)(1,1,1)[12] with drift         : -656.7768
##  ARIMA(3,0,2)(0,1,1)[12] with drift         : -652.7016
##  ARIMA(3,0,2)(1,1,0)[12] with drift         : -619.3719
##  ARIMA(3,0,2)(1,1,2)[12] with drift         : -665.6685
##  ARIMA(3,0,2)(0,1,2)[12] with drift         : -650.7142
##  ARIMA(3,0,2)(2,1,2)[12] with drift         : -656.6332
##  ARIMA(3,0,1)(1,1,2)[12] with drift         : -664.6093
##  ARIMA(4,0,2)(1,1,2)[12] with drift         : -666.605
##  ARIMA(4,0,2)(0,1,2)[12] with drift         : -660.8082
##  ARIMA(4,0,2)(1,1,1)[12] with drift         : -655.3268
##  ARIMA(4,0,2)(2,1,2)[12] with drift         : -657.136
##  ARIMA(4,0,2)(0,1,1)[12] with drift         : -662.8683
##  ARIMA(4,0,2)(2,1,1)[12] with drift         : -655.2902
##  ARIMA(4,0,1)(1,1,2)[12] with drift         : -668.8083
##  ARIMA(4,0,1)(0,1,2)[12] with drift         : -662.6621
##  ARIMA(4,0,1)(1,1,1)[12] with drift         : -657.2411
##  ARIMA(4,0,1)(2,1,2)[12] with drift         : -659.3252
##  ARIMA(4,0,1)(0,1,1)[12] with drift         : -664.6677
##  ARIMA(4,0,1)(2,1,1)[12] with drift         : -657.3808
##  ARIMA(4,0,0)(1,1,2)[12] with drift         : -670.2084
##  ARIMA(4,0,0)(0,1,2)[12] with drift         : -663.3315
##  ARIMA(4,0,0)(1,1,1)[12] with drift         : -658.9
##  ARIMA(4,0,0)(2,1,2)[12] with drift         : -660.5638
##  ARIMA(4,0,0)(0,1,1)[12] with drift         : -665.3332
##  ARIMA(4,0,0)(2,1,1)[12] with drift         : -659.1816
##  ARIMA(3,0,0)(1,1,2)[12] with drift         : -658.8556
##  ARIMA(5,0,0)(1,1,2)[12] with drift         : -665.9256
##  ARIMA(5,0,1)(1,1,2)[12] with drift         : -663.7188
##  ARIMA(4,0,0)(1,1,2)[12]                    : Inf
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(4,0,0)(1,1,2)[12] with drift         : -701.4062
## 
##  Best model: ARIMA(4,0,0)(1,1,2)[12] with drift
fit_sarima
## Series: import_train_set 
## ARIMA(4,0,0)(1,1,2)[12] with drift 
## Box Cox transformation: lambda= -0.06418615 
## 
## Coefficients:
##          ar1     ar2     ar3      ar4     sar1     sma1     sma2   drift
##       0.5861  0.1695  0.3894  -0.2094  -0.6601  -0.0943  -0.5448  0.0062
## s.e.  0.0654  0.0716  0.0741   0.0696   0.5327   0.5195   0.3939  0.0011
## 
## sigma^2 = 0.002442:  log likelihood = 360.12
## AIC=-702.23   AICc=-701.41   BIC=-671.37
import_new <- window(import,start = c(2002,01), end = c(2022,12))
predict_sarima <- forecast(fit_sarima,h=12,level=c(80, 95))
autoplot(import_new) + autolayer(predict_sarima$mean)

accuracy(predict_sarima, import_test_set)
##                        ME     RMSE       MAE        MPE     MAPE      MASE
## Training set  0.006347493 0.706554 0.5071453 -0.1173684 4.261379 0.3435838
## Test set     -0.532550963 2.365628 1.9123443 -2.2228595 7.474564 1.2955863
##                     ACF1 Theil's U
## Training set -0.04826276        NA
## Test set      0.40122740 0.8638763
checkresiduals(fit_sarima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,0,0)(1,1,2)[12] with drift
## Q* = 15.76, df = 17, p-value = 0.5409
## 
## Model df: 7.   Total lags used: 24
eacf(import_train_set)
## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x x x x x x x x x x x  x  x  x 
## 1 x x o x o o o o o x o  x  o  o 
## 2 x x o x o o o o o x o  x  x  o 
## 3 o x o x o o o o o o o  x  x  o 
## 4 o x x x o o o o o o o  x  x  o 
## 5 x o o x x o o o o o o  x  o  o 
## 6 x o o x o o o o o o o  x  x  o 
## 7 x x o x o x o o o o o  x  o  o
fit_2 <- Arima(y = import_train_set, order = c(2,0,3), seasonal = c(1,1,2), lambda = BoxCox.lambda(import_train_set), include.drift = TRUE)
fit_2
## Series: import_train_set 
## ARIMA(2,0,3)(1,1,2)[12] with drift 
## Box Cox transformation: lambda= -0.06418615 
## 
## Coefficients:
##          ar1     ar2      ma1      ma2     ma3     sar1     sma1     sma2
##       0.7100  0.2127  -0.1455  -0.0921  0.2919  -0.6185  -0.1294  -0.5373
## s.e.  0.1815  0.1756   0.1727   0.0876  0.0622   0.4358   0.4225   0.3223
##        drift
##       0.0061
## s.e.  0.0009
## 
## sigma^2 = 0.00245:  log likelihood = 359.92
## AIC=-699.85   AICc=-698.84   BIC=-665.56
checkresiduals(fit_2)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,3)(1,1,2)[12] with drift
## Q* = 18.457, df = 16, p-value = 0.2978
## 
## Model df: 8.   Total lags used: 24
import_new <- window(import,start = c(2002,01), end = c(2022,12))
predict_sarima <- forecast(fit_2,h=12,level=c(80, 95))
autoplot(import_new) + autolayer(predict_sarima$mean)

accuracy(predict_sarima, import_test_set)
##                        ME      RMSE       MAE         MPE     MAPE      MASE
## Training set  0.008726451 0.7060893 0.5112381 -0.07710986 4.273328 0.3463566
## Test set     -0.477158978 2.3099154 1.9143653 -2.01646596 7.489075 1.2969555
##                     ACF1 Theil's U
## Training set -0.03674625        NA
## Test set      0.33623722 0.8456735
checkresiduals(fit_2)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,3)(1,1,2)[12] with drift
## Q* = 18.457, df = 16, p-value = 0.2978
## 
## Model df: 8.   Total lags used: 24
fit_3 <- Arima(y = import_train_set, order = c(2,0,3), seasonal = c(1,1,2), lambda = BoxCox.lambda(import_train_set))
fit_3
## Series: import_train_set 
## ARIMA(2,0,3)(1,1,2)[12] 
## Box Cox transformation: lambda= -0.06418615 
## 
## Coefficients:
##          ar1     ar2      ma1      ma2     ma3     sar1     sma1     sma2
##       0.7304  0.2654  -0.1383  -0.1289  0.2718  -0.6403  -0.1251  -0.5505
## s.e.  0.1787  0.1782   0.1703   0.0843  0.0608   0.4502   0.4372   0.3396
## 
## sigma^2 = 0.002501:  log likelihood = 356.45
## AIC=-694.9   AICc=-694.07   BIC=-664.03
checkresiduals(fit_3)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,3)(1,1,2)[12]
## Q* = 21.644, df = 16, p-value = 0.1551
## 
## Model df: 8.   Total lags used: 24
import_new <- window(import,start = c(2002,01), end = c(2022,12))
predict_sarima <- forecast(fit_3,h=12,level=c(80, 95))
autoplot(import_new) + autolayer(predict_sarima$mean)

accuracy(predict_sarima, import_test_set)
##                       ME      RMSE       MAE         MPE     MAPE      MASE
## Training set  0.04268768 0.7166455 0.5172131  0.06111645 4.313253 0.3504046
## Test set     -1.38907552 2.9972462 2.3628244 -5.46220786 9.246391 1.6007802
##                     ACF1 Theil's U
## Training set -0.03716487        NA
## Test set      0.45220940  1.099747
checkresiduals(fit_3)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,3)(1,1,2)[12]
## Q* = 21.644, df = 16, p-value = 0.1551
## 
## Model df: 8.   Total lags used: 24
fit_4 <- Arima(y = import_train_set, order = c(2,1,3), seasonal = c(1,0,2), lambda = BoxCox.lambda(import_train_set), include.drift = TRUE)
fit_4
## Series: import_train_set 
## ARIMA(2,1,3)(1,0,2)[12] with drift 
## Box Cox transformation: lambda= -0.06418615 
## 
## Coefficients:
##           ar1      ar2     ma1     ma2     ma3    sar1     sma1     sma2
##       -0.5503  -0.3619  0.1539  0.0855  0.1169  0.9878  -0.7800  -0.0088
## s.e.   0.2664   0.2041  0.2708  0.1512  0.1159  0.0081   0.0762   0.0755
##        drift
##       0.0088
## s.e.  0.0132
## 
## sigma^2 = 0.002482:  log likelihood = 372.11
## AIC=-724.22   AICc=-723.26   BIC=-689.46
import_new <- window(import,start = c(2002,01), end = c(2022,12))
predict_sarima <- forecast(fit_4,h=12,level=c(80, 95))
autoplot(import_new) + autolayer(predict_sarima$mean)

accuracy(predict_sarima, import_test_set)
##                       ME      RMSE       MAE       MPE     MAPE      MASE
## Training set  0.01429168 0.7174144 0.5256934 -0.271282 4.490586 0.3561498
## Test set     -1.56994208 3.1392563 2.4114279 -6.158123 9.420429 1.6337084
##                     ACF1 Theil's U
## Training set -0.04095854        NA
## Test set      0.48218851  1.153048
checkresiduals(fit_4)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,3)(1,0,2)[12] with drift
## Q* = 18.536, df = 16, p-value = 0.2935
## 
## Model df: 8.   Total lags used: 24
fit_5 <- Arima(y = import_train_set, order = c(4,1,0), seasonal = c(0,0,1), lambda = BoxCox.lambda(import_train_set))
fit_5
## Series: import_train_set 
## ARIMA(4,1,0)(0,0,1)[12] 
## Box Cox transformation: lambda= -0.06418615 
## 
## Coefficients:
##           ar1      ar2     ar3      ar4    sma1
##       -0.2237  -0.1783  0.0769  -0.0843  0.4628
## s.e.   0.0644   0.0669  0.0681   0.0658  0.0513
## 
## sigma^2 = 0.004298:  log likelihood = 313.09
## AIC=-614.19   AICc=-613.83   BIC=-593.33
import_new <- window(import,start = c(2002,01), end = c(2022,12))
predict_sarima <- forecast(fit_5,h=12,level=c(80, 95))
autoplot(import_new) + autolayer(predict_sarima$mean)

accuracy(predict_sarima, import_test_set)
##                      ME      RMSE       MAE        MPE     MAPE      MASE
## Training set  0.1099882 0.9223215 0.6825108  0.5673499 5.924645 0.4623914
## Test set     -1.9481467 2.6477560 1.9521812 -7.7860686 7.800485 1.3225752
##                     ACF1 Theil's U
## Training set -0.07525873        NA
## Test set      0.02376887 0.9834012
checkresiduals(fit_5)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,1,0)(0,0,1)[12]
## Q* = 87.034, df = 19, p-value = 1.108e-10
## 
## Model df: 5.   Total lags used: 24
fit_6 <- Arima(y = import_train_set, order = c(4,1,0), seasonal = c(1,1,2), lambda = BoxCox.lambda(import_train_set))
fit_6
## Series: import_train_set 
## ARIMA(4,1,0)(1,1,2)[12] 
## Box Cox transformation: lambda= -0.06418615 
## 
## Coefficients:
##           ar1      ar2     ar3     ar4     sar1     sma1     sma2
##       -0.4038  -0.2004  0.2115  0.0507  -0.6512  -0.1173  -0.5411
## s.e.   0.0673   0.0721  0.0739  0.0682   0.5645   0.5530   0.4283
## 
## sigma^2 = 0.002477:  log likelihood = 356.47
## AIC=-696.94   AICc=-696.28   BIC=-669.54
import_new <- window(import,start = c(2002,01), end = c(2022,12))
predict_sarima <- forecast(fit_6,h=12,level=c(80, 95))
autoplot(import_new) + autolayer(predict_sarima$mean)

accuracy(predict_sarima, import_test_set)
##                       ME      RMSE       MAE        MPE     MAPE      MASE
## Training set  0.01101387 0.7143004 0.5117931 -0.2893103 4.267652 0.3467326
## Test set     -1.59825591 3.2141677 2.4742477 -6.2440867 9.670932 1.6762679
##                     ACF1 Theil's U
## Training set -0.03606185        NA
## Test set      0.51106767  1.178512
checkresiduals(fit_6)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,1,0)(1,1,2)[12]
## Q* = 17.72, df = 17, p-value = 0.4067
## 
## Model df: 7.   Total lags used: 24
fit_x <- Arima(y = import_train_set, order = c(2,1,0), seasonal = c(1,0,0), lambda = BoxCox.lambda(import_train_set), include.drift = TRUE)
fit_x
## Series: import_train_set 
## ARIMA(2,1,0)(1,0,0)[12] with drift 
## Box Cox transformation: lambda= -0.06418615 
## 
## Coefficients:
##           ar1      ar2    sar1   drift
##       -0.3513  -0.2367  0.6394  0.0085
## s.e.   0.0636   0.0628  0.0495  0.0061
## 
## sigma^2 = 0.00344:  log likelihood = 337.47
## AIC=-664.94   AICc=-664.68   BIC=-647.56
checkresiduals(fit_x)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,0)(1,0,0)[12] with drift
## Q* = 48.948, df = 21, p-value = 0.0005101
## 
## Model df: 3.   Total lags used: 24
predict_sarima <- forecast(fit_x,h=12,level=c(80, 95))
import_new <- window(import,start = c(2002,01), end = c(2022,12))
autoplot(import_new) + autolayer(predict_sarima$mean)

accuracy(predict_sarima, import_test_set)
##                        ME     RMSE       MAE         MPE      MAPE      MASE
## Training set  0.008517631 0.819143 0.6125046  -0.2600732  5.324398 0.4149632
## Test set     -2.890094505 4.117066 2.8900945 -11.2719953 11.271995 1.9579982
##                     ACF1 Theil's U
## Training set -0.04919688        NA
## Test set      0.46156789  1.539797
checkresiduals(fit_x)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,0)(1,0,0)[12] with drift
## Q* = 48.948, df = 21, p-value = 0.0005101
## 
## Model df: 3.   Total lags used: 24
import_new <- window(import,start = c(2002,01), end = c(2022,12))
predict_sarima <- naive(import_train_set,h=12,level=c(80, 95))
autoplot(import_new) + autolayer(predict_sarima$mean)

accuracy(predict_sarima, import_test_set)
##                      ME     RMSE       MAE       MPE     MAPE      MASE
## Training set  0.1011942 1.151524 0.8109652  0.439801 6.990740 0.5494175
## Test set     -1.3643099 2.235439 1.7168102 -5.656129 6.900791 1.1631147
##                    ACF1 Theil's U
## Training set -0.1961309        NA
## Test set     -0.0735884 0.8060089

Pre COVID

import_train_set <- window(import,start = c(2002,01), end = c(2018,12))
tsdisplay(diff(BoxCox(import_train_set, BoxCox.lambda(import_train_set)),1))

import_test_set <- window(import,start = c(2019,01), end = c(2019,12))
fit_sarima <- auto.arima(import_train_set, lambda = BoxCox.lambda(import_train_set), seasonal=TRUE, trace = TRUE)
## 
##  Fitting models using approximations to speed things up...
## 
##  ARIMA(2,0,2)(1,1,1)[12] with drift         : -418.6819
##  ARIMA(0,0,0)(0,1,0)[12] with drift         : -135.3141
##  ARIMA(1,0,0)(1,1,0)[12] with drift         : -365.3952
##  ARIMA(0,0,1)(0,1,1)[12] with drift         : -286.4532
##  ARIMA(0,0,0)(0,1,0)[12]                    : -73.13522
##  ARIMA(2,0,2)(0,1,1)[12] with drift         : -414.5916
##  ARIMA(2,0,2)(1,1,0)[12] with drift         : -381.5546
##  ARIMA(2,0,2)(2,1,1)[12] with drift         : -423.6967
##  ARIMA(2,0,2)(2,1,0)[12] with drift         : -398.8311
##  ARIMA(2,0,2)(2,1,2)[12] with drift         : Inf
##  ARIMA(2,0,2)(1,1,2)[12] with drift         : -431.9346
##  ARIMA(2,0,2)(0,1,2)[12] with drift         : -412.5721
##  ARIMA(1,0,2)(1,1,2)[12] with drift         : -431.2832
##  ARIMA(2,0,1)(1,1,2)[12] with drift         : -427.5589
##  ARIMA(3,0,2)(1,1,2)[12] with drift         : -438.29
##  ARIMA(3,0,2)(0,1,2)[12] with drift         : -428.9483
##  ARIMA(3,0,2)(1,1,1)[12] with drift         : -431.9003
##  ARIMA(3,0,2)(2,1,2)[12] with drift         : -428.4381
##  ARIMA(3,0,2)(0,1,1)[12] with drift         : -431.1032
##  ARIMA(3,0,2)(2,1,1)[12] with drift         : -426.8746
##  ARIMA(3,0,1)(1,1,2)[12] with drift         : -437.4966
##  ARIMA(4,0,2)(1,1,2)[12] with drift         : -437.6136
##  ARIMA(3,0,3)(1,1,2)[12] with drift         : -436.2747
##  ARIMA(2,0,3)(1,1,2)[12] with drift         : -435.0346
##  ARIMA(4,0,1)(1,1,2)[12] with drift         : -439.8631
##  ARIMA(4,0,1)(0,1,2)[12] with drift         : -437.0589
##  ARIMA(4,0,1)(1,1,1)[12] with drift         : -431.7216
##  ARIMA(4,0,1)(2,1,2)[12] with drift         : -429.7259
##  ARIMA(4,0,1)(0,1,1)[12] with drift         : -439.2025
##  ARIMA(4,0,1)(2,1,1)[12] with drift         : -428.1675
##  ARIMA(4,0,0)(1,1,2)[12] with drift         : -441.5804
##  ARIMA(4,0,0)(0,1,2)[12] with drift         : -438.2225
##  ARIMA(4,0,0)(1,1,1)[12] with drift         : -433.5616
##  ARIMA(4,0,0)(2,1,2)[12] with drift         : -431.1525
##  ARIMA(4,0,0)(0,1,1)[12] with drift         : -440.3507
##  ARIMA(4,0,0)(2,1,1)[12] with drift         : -430.0511
##  ARIMA(3,0,0)(1,1,2)[12] with drift         : -433.5659
##  ARIMA(5,0,0)(1,1,2)[12] with drift         : -437.1763
##  ARIMA(5,0,1)(1,1,2)[12] with drift         : -435.1652
##  ARIMA(4,0,0)(1,1,2)[12]                    : -439.209
## 
##  Now re-fitting the best model(s) without approximations...
## 
##  ARIMA(4,0,0)(1,1,2)[12] with drift         : -470.4953
## 
##  Best model: ARIMA(4,0,0)(1,1,2)[12] with drift
fit_sarima
## Series: import_train_set 
## ARIMA(4,0,0)(1,1,2)[12] with drift 
## Box Cox transformation: lambda= 0.06440774 
## 
## Coefficients:
##          ar1     ar2     ar3      ar4     sar1     sma1     sma2   drift
##       0.5742  0.1824  0.3762  -0.2103  -0.5973  -0.1558  -0.4882  0.0077
## s.e.  0.0719  0.0778  0.0814   0.0758   0.6483   0.6369   0.4825  0.0013
## 
## sigma^2 = 0.004483:  log likelihood = 244.74
## AIC=-471.48   AICc=-470.5   BIC=-442.17
checkresiduals(fit_sarima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,0,0)(1,1,2)[12] with drift
## Q* = 16.368, df = 17, p-value = 0.4979
## 
## Model df: 7.   Total lags used: 24
eacf(import_train_set)
## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x x x x x x x x x x x  x  x  x 
## 1 x x o x o o o o o o o  x  o  o 
## 2 x x o x o o o o o o o  x  o  o 
## 3 o x o x o o o o o o o  x  x  o 
## 4 o x x x o o o o o o o  x  x  o 
## 5 x o x x o o o o o o o  x  o  x 
## 6 x o o x o o o o o o o  x  o  x 
## 7 x o o x o o o o o o o  x  o  x
predict_sarima <- forecast(fit_sarima,h=12,level=c(80, 95))
import_new <- window(import,start = c(2002,01), end = c(2019,12))
autoplot(import_new) + autolayer(predict_sarima$mean)

accuracy(predict_sarima, import_test_set)
##                        ME      RMSE       MAE         MPE     MAPE      MASE
## Training set -0.002922984 0.5971973 0.4348440 -0.06344058 4.173952 0.3394823
## Test set     -0.286899439 0.9223954 0.6921775 -1.84513917 4.054110 0.5403824
##                     ACF1 Theil's U
## Training set -0.07165486        NA
## Test set     -0.22537498 0.4746857
fit_2 <- Arima(y = import_train_set, order = c(4,0,0), seasonal = c(1,1,2), lambda = BoxCox.lambda(import_train_set))
fit_2
## Series: import_train_set 
## ARIMA(4,0,0)(1,1,2)[12] 
## Box Cox transformation: lambda= 0.06440774 
## 
## Coefficients:
##          ar1     ar2     ar3      ar4     sar1     sma1     sma2
##       0.5948  0.1984  0.3906  -0.1902  -0.6016  -0.1684  -0.4897
## s.e.  0.0726  0.0783  0.0819   0.0757   0.7624   0.7522   0.5821
## 
## sigma^2 = 0.004564:  log likelihood = 241.96
## AIC=-467.92   AICc=-467.13   BIC=-441.86
checkresiduals(fit_2)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,0,0)(1,1,2)[12]
## Q* = 19.165, df = 17, p-value = 0.3191
## 
## Model df: 7.   Total lags used: 24
predict_sarima <- forecast(fit_2,h=12,level=c(80, 95))
import_new <- window(import,start = c(2002,01), end = c(2019,12))
autoplot(import_new) + autolayer(predict_sarima$mean)

accuracy(predict_sarima, import_test_set)
##                      ME      RMSE       MAE        MPE     MAPE      MASE
## Training set 0.01703515 0.6043885 0.4360069 0.02761584 4.172384 0.3403902
## Test set     0.25747719 0.9219941 0.7716054 1.23301803 4.459348 0.6023917
##                    ACF1 Theil's U
## Training set -0.0681104        NA
## Test set     -0.2571047 0.5167796
checkresiduals(fit_2)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,0,0)(1,1,2)[12]
## Q* = 19.165, df = 17, p-value = 0.3191
## 
## Model df: 7.   Total lags used: 24
fit_3 <- Arima(y = import_train_set, order = c(3,0,1), seasonal = c(0,1,1), lambda = BoxCox.lambda(import_train_set),include.drift = TRUE)
fit_3
## Series: import_train_set 
## ARIMA(3,0,1)(0,1,1)[12] with drift 
## Box Cox transformation: lambda= 0.06440774 
## 
## Coefficients:
##          ar1     ar2     ar3     ma1     sma1   drift
##       0.1629  0.3527  0.3992  0.3827  -0.7975  0.0078
## s.e.  0.1501  0.1123  0.0736  0.1541   0.0629  0.0015
## 
## sigma^2 = 0.004493:  log likelihood = 243.12
## AIC=-472.25   AICc=-471.64   BIC=-449.45
checkresiduals(fit_3)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(3,0,1)(0,1,1)[12] with drift
## Q* = 23.927, df = 19, p-value = 0.199
## 
## Model df: 5.   Total lags used: 24
predict_sarima <- forecast(fit_3,h=12,level=c(80, 95))
import_new <- window(import,start = c(2002,01), end = c(2019,12))
autoplot(import_new) + autolayer(predict_sarima$mean)

accuracy(predict_sarima, import_test_set)
##                        ME      RMSE       MAE        MPE     MAPE      MASE
## Training set -0.004285681 0.5991217 0.4394703 -0.1082429 4.220172 0.3430941
## Test set     -0.345944372 0.9189453 0.6927064 -2.2098641 4.085667 0.5407953
##                     ACF1 Theil's U
## Training set -0.05043877        NA
## Test set     -0.25616704  0.474047
fit_4 <- Arima(y = import_train_set, order = c(3,0,1), seasonal = c(0,1,1), lambda = BoxCox.lambda(import_train_set))
fit_4
## Series: import_train_set 
## ARIMA(3,0,1)(0,1,1)[12] 
## Box Cox transformation: lambda= 0.06440774 
## 
## Coefficients:
##          ar1     ar2     ar3     ma1     sma1
##       0.2039  0.3700  0.4193  0.3655  -0.7999
## s.e.  0.1479  0.1163  0.0749  0.1544   0.0612
## 
## sigma^2 = 0.004552:  log likelihood = 240.84
## AIC=-469.68   AICc=-469.23   BIC=-450.14
predict_sarima <- forecast(fit_4,h=12,level=c(80, 95))
import_new <- window(import,start = c(2002,01), end = c(2019,12))
autoplot(import_new) + autolayer(predict_sarima$mean)

accuracy(predict_sarima, import_test_set)
##                      ME      RMSE       MAE         MPE     MAPE      MASE
## Training set 0.01105571 0.6053335 0.4395407 -0.04571701 4.211272 0.3431491
## Test set     0.09593566 0.8641353 0.7096519  0.29891123 4.129886 0.5540246
##                     ACF1 Theil's U
## Training set -0.05098794        NA
## Test set     -0.28332728 0.4728396
checkresiduals(fit_4)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(3,0,1)(0,1,1)[12]
## Q* = 25.144, df = 19, p-value = 0.1559
## 
## Model df: 5.   Total lags used: 24
fit_5 <- Arima(y = import_train_set, order = c(2,0,3), seasonal = c(0,1,1), lambda = BoxCox.lambda(import_train_set), include.drift = TRUE)
fit_5
## Series: import_train_set 
## ARIMA(2,0,3)(0,1,1)[12] with drift 
## Box Cox transformation: lambda= 0.06440774 
## 
## Coefficients:
##          ar1     ar2      ma1      ma2     ma3     sma1   drift
##       0.6906  0.2258  -0.1437  -0.0975  0.2693  -0.7826  0.0077
## s.e.  0.2094  0.2005   0.2018   0.0965  0.0663   0.0651  0.0013
## 
## sigma^2 = 0.004487:  log likelihood = 243.88
## AIC=-471.77   AICc=-470.98   BIC=-445.71
predict_sarima <- forecast(fit_5,h=12,level=c(80, 95))
import_new <- window(import,start = c(2002,01), end = c(2019,12))
autoplot(import_new) + autolayer(predict_sarima$mean)

accuracy(predict_sarima, import_test_set)
##                        ME      RMSE       MAE         MPE     MAPE      MASE
## Training set -0.001229731 0.6009981 0.4409565 -0.04656622 4.203558 0.3442544
## Test set     -0.219495379 0.9144917 0.7090901 -1.45451304 4.149860 0.5535861
##                     ACF1 Theil's U
## Training set -0.05393457        NA
## Test set     -0.18704421 0.4808977
checkresiduals(fit_5)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,3)(0,1,1)[12] with drift
## Q* = 21.636, df = 18, p-value = 0.2485
## 
## Model df: 6.   Total lags used: 24
fit_6 <- Arima(y = import_train_set, order = c(2,0,3), seasonal = c(0,1,1), lambda = BoxCox.lambda(import_train_set), include.drift = FALSE)
fit_6
## Series: import_train_set 
## ARIMA(2,0,3)(0,1,1)[12] 
## Box Cox transformation: lambda= 0.06440774 
## 
## Coefficients:
##          ar1     ar2      ma1      ma2     ma3     sma1
##       0.7175  0.2757  -0.1396  -0.1316  0.2513  -0.7933
## s.e.  0.2050  0.2042   0.1976   0.0941  0.0655   0.0631
## 
## sigma^2 = 0.004583:  log likelihood = 240.84
## AIC=-467.68   AICc=-467.07   BIC=-444.88
checkresiduals(fit_6)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,3)(0,1,1)[12]
## Q* = 24.411, df = 18, p-value = 0.142
## 
## Model df: 6.   Total lags used: 24
predict_sarima <- forecast(fit_6,h=12,level=c(80, 95))
import_new <- window(import,start = c(2002,01), end = c(2019,12))
autoplot(import_new) + autolayer(predict_sarima$mean)

accuracy(predict_sarima, import_test_set)
##                      ME      RMSE       MAE       MPE     MAPE      MASE
## Training set 0.02061077 0.6086834 0.4447737 0.0595886 4.245419 0.3472344
## Test set     0.40535209 0.9842110 0.8406341 2.0767618 4.821359 0.6562823
##                     ACF1 Theil's U
## Training set -0.05948158        NA
## Test set     -0.22344541  0.568211
checkresiduals(fit_6)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,0,3)(0,1,1)[12]
## Q* = 24.411, df = 18, p-value = 0.142
## 
## Model df: 6.   Total lags used: 24
fit_7 <- Arima(y = import_train_set, order = c(4,0,0), seasonal = c(0,1,1), lambda = BoxCox.lambda(import_train_set), include.drift = TRUE)
fit_7
## Series: import_train_set 
## ARIMA(4,0,0)(0,1,1)[12] with drift 
## Box Cox transformation: lambda= 0.06440774 
## 
## Coefficients:
##          ar1     ar2     ar3      ar4     sma1   drift
##       0.5693  0.1827  0.3807  -0.2087  -0.7710  0.0077
## s.e.  0.0708  0.0775  0.0804   0.0758   0.0667  0.0014
## 
## sigma^2 = 0.004442:  log likelihood = 244.65
## AIC=-475.31   AICc=-474.7   BIC=-452.51
checkresiduals(fit_7)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,0,0)(0,1,1)[12] with drift
## Q* = 16.364, df = 19, p-value = 0.6329
## 
## Model df: 5.   Total lags used: 24
predict_sarima <- forecast(fit_7,h=12,level=c(80, 95))
import_new <- window(import,start = c(2002,01), end = c(2019,12))
autoplot(import_new) + autolayer(predict_sarima$mean)

accuracy(predict_sarima, import_test_set)
##                        ME      RMSE       MAE         MPE     MAPE      MASE
## Training set -0.003872741 0.5988067 0.4371195 -0.07583659 4.190418 0.3412589
## Test set     -0.277760437 0.9224962 0.6880363 -1.78926492 4.029064 0.5371494
##                     ACF1 Theil's U
## Training set -0.07013443        NA
## Test set     -0.20968457  0.472439
fit_8 <- Arima(y = import_train_set, order = c(4,0,0), seasonal = c(0,1,1), lambda = BoxCox.lambda(import_train_set), include.drift = FALSE)
fit_8
## Series: import_train_set 
## ARIMA(4,0,0)(0,1,1)[12] 
## Box Cox transformation: lambda= 0.06440774 
## 
## Coefficients:
##          ar1     ar2     ar3      ar4     sma1
##       0.5909  0.1985  0.3934  -0.1895  -0.7823
## s.e.  0.0714  0.0781  0.0811   0.0758   0.0640
## 
## sigma^2 = 0.00452:  log likelihood = 241.92
## AIC=-471.83   AICc=-471.38   BIC=-452.29
checkresiduals(fit_8)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,0,0)(0,1,1)[12]
## Q* = 19.065, df = 19, p-value = 0.4527
## 
## Model df: 5.   Total lags used: 24
predict_sarima <- forecast(fit_8,h=12,level=c(80, 95))
import_new <- window(import,start = c(2002,01), end = c(2019,12))
autoplot(import_new) + autolayer(predict_sarima$mean)

accuracy(predict_sarima, import_test_set)
##                      ME      RMSE       MAE        MPE     MAPE      MASE
## Training set 0.01760149 0.6054706 0.4379557 0.03092314 4.186247 0.3419116
## Test set     0.25387064 0.9207406 0.7712777 1.21557371 4.457958 0.6021359
##                     ACF1 Theil's U
## Training set -0.06694881        NA
## Test set     -0.25158280 0.5142314
fit_10 <- Arima(y = import_train_set, order = c(4,1,0), seasonal = c(0,0,1), lambda = BoxCox.lambda(import_train_set), include.drift = TRUE)
fit_10
## Series: import_train_set 
## ARIMA(4,1,0)(0,0,1)[12] with drift 
## Box Cox transformation: lambda= 0.06440774 
## 
## Coefficients:
##           ar1      ar2     ar3      ar4    sma1   drift
##       -0.2311  -0.1887  0.0700  -0.0756  0.4449  0.0086
## s.e.   0.0706   0.0741  0.0754   0.0733  0.0581  0.0060
## 
## sigma^2 = 0.007642:  log likelihood = 208.31
## AIC=-402.62   AICc=-402.05   BIC=-379.43
checkresiduals(fit_10)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(4,1,0)(0,0,1)[12] with drift
## Q* = 71.672, df = 19, p-value = 4.839e-08
## 
## Model df: 5.   Total lags used: 24
predict_sarima <- forecast(fit_10,h=12,level=c(80, 95))
import_new <- window(import,start = c(2002,01), end = c(2019,12))
autoplot(import_new) + autolayer(predict_sarima$mean)

accuracy(predict_sarima, import_test_set)
##                        ME      RMSE       MAE        MPE     MAPE      MASE
## Training set -0.006955019 0.8027907 0.5832870 -0.2908851 5.782355 0.4553717
## Test set     -0.091865212 1.1848420 0.9524353 -1.2207310 5.845635 0.7435654
##                     ACF1 Theil's U
## Training set -0.06197456        NA
## Test set      0.19338779 0.6993478
fit_11 <- Arima(y = import_train_set, order = c(3,1,1), seasonal = c(0,0,1), lambda = BoxCox.lambda(import_train_set), include.drift = FALSE)
fit_11
## Series: import_train_set 
## ARIMA(3,1,1)(0,0,1)[12] 
## Box Cox transformation: lambda= 0.06440774 
## 
## Coefficients:
##           ar1      ar2     ar3     ma1    sma1
##       -0.7385  -0.2873  0.0072  0.5197  0.4568
## s.e.   0.3937   0.1332  0.1242  0.3874  0.0561
## 
## sigma^2 = 0.007667:  log likelihood = 207.39
## AIC=-402.79   AICc=-402.36   BIC=-382.91
checkresiduals(fit_11)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(3,1,1)(0,0,1)[12]
## Q* = 70.774, df = 19, p-value = 6.836e-08
## 
## Model df: 5.   Total lags used: 24
predict_sarima <- forecast(fit_11,h=12,level=c(80, 95))
import_new <- window(import,start = c(2002,01), end = c(2019,12))
autoplot(import_new) + autolayer(predict_sarima$mean)

accuracy(predict_sarima, import_test_set)
##                     ME      RMSE       MAE       MPE     MAPE      MASE
## Training set 0.0606049 0.8038033 0.5910564 0.3878053 5.823694 0.4614372
## Test set     0.5775210 1.5335435 1.2703004 2.5917797 7.484240 0.9917225
##                     ACF1 Theil's U
## Training set -0.06776791        NA
## Test set      0.35677627 0.9047817
fit_x <- Arima(y = import_train_set, order = c(2,1,0), seasonal = c(1,0,0), lambda = BoxCox.lambda(import_train_set), include.drift = TRUE)
fit_x
## Series: import_train_set 
## ARIMA(2,1,0)(1,0,0)[12] with drift 
## Box Cox transformation: lambda= 0.06440774 
## 
## Coefficients:
##           ar1      ar2    sar1   drift
##       -0.3451  -0.2351  0.6228  0.0087
## s.e.   0.0697   0.0693  0.0560  0.0085
## 
## sigma^2 = 0.006302:  log likelihood = 225.23
## AIC=-440.46   AICc=-440.15   BIC=-423.89
checkresiduals(fit_x)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,0)(1,0,0)[12] with drift
## Q* = 48.053, df = 21, p-value = 0.0006764
## 
## Model df: 3.   Total lags used: 24
predict_sarima <- forecast(fit_x,h=12,level=c(80, 95))
import_new <- window(import,start = c(2002,01), end = c(2019,12))
autoplot(import_new) + autolayer(predict_sarima$mean)

accuracy(predict_sarima, import_test_set)
##                        ME      RMSE       MAE        MPE     MAPE      MASE
## Training set -0.008626911 0.7246152 0.5394976 -0.2717089 5.292135 0.4211853
## Test set     -0.045949977 1.0411485 0.8827482 -0.7369543 5.304352 0.6891608
##                     ACF1 Theil's U
## Training set -0.03940972        NA
## Test set     -0.12484506 0.5790083
checkresiduals(fit_x)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,0)(1,0,0)[12] with drift
## Q* = 48.053, df = 21, p-value = 0.0006764
## 
## Model df: 3.   Total lags used: 24
import_new <- window(import,start = c(2002,01), end = c(2019,12))
predict_sarima <- snaive(import_train_set,h=12,level=c(80, 95))
autoplot(import_new) + autolayer(predict_sarima$mean)

accuracy(predict_sarima, import_test_set)
##                     ME     RMSE      MAE      MPE      MAPE      MASE
## Training set 0.7617909 1.527044 1.280903 7.136700 12.594347 1.0000000
## Test set     0.4499564 1.248044 1.035570 2.415875  5.877225 0.8084685
##                    ACF1 Theil's U
## Training set  0.7750122        NA
## Test set     -0.3960617 0.7297898